DATA1901 Project 1 Group 5 Submission

Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(fmsb))

data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")

filteredData = select(data, Date, PID, BSSQ_1:BSSQ_16, ASSQ_1:ASSQ_16, age, VRexperience, ssq_full, gender, sm)
#we only want to look at those that did not undergo social modelling conditions
filteredData = subset(filteredData, sm == "NO_SM")

#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)
filteredData = mutate(filteredData, d_burping = ASSQ_16 - BSSQ_16)

#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)

#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom
filteredData = mutate(filteredData, age_group = case_when(
  age >= 16 & age <= 21 ~ "16 to 21",
  age >= 22 & age <= 29 ~ "22 to 29",
  age >= 30 & age <= 37 ~ "30 to 37",
  age >= 38 & age <= 45 ~ "38 to 45",
  age > 45 ~ "above 45"
))

#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)

#renaming columns to be more informative
names(filteredData)[names(filteredData) == 'BSSQ_1'] <- 'BSSQ_discomfort'
names(filteredData)[names(filteredData) == 'BSSQ_2'] <- 'BSSQ_fatigue'
names(filteredData)[names(filteredData) == 'BSSQ_3'] <- 'BSSQ_headache'
names(filteredData)[names(filteredData) == 'BSSQ_4'] <- 'BSSQ_eyestrain'
names(filteredData)[names(filteredData) == 'BSSQ_5'] <- 'BSSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'BSSQ_6'] <- 'BSSQ_salivation'
names(filteredData)[names(filteredData) == 'BSSQ_7'] <- 'BSSQ_sweating'
names(filteredData)[names(filteredData) == 'BSSQ_8'] <- 'BSSQ_nausea'
names(filteredData)[names(filteredData) == 'BSSQ_9'] <- 'BSSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'BSSQ_10'] <- 'BSSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'BSSQ_11'] <- 'BSSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'BSSQ_12'] <- 'BSSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'BSSQ_13'] <- 'BSSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'BSSQ_14'] <- 'BSSQ_vertigo'
names(filteredData)[names(filteredData) == 'BSSQ_15'] <- 'BSSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'BSSQ_16'] <- 'BSSQ_burping'

names(filteredData)[names(filteredData) == 'ASSQ_1'] <- 'ASSQ_discomfort'
names(filteredData)[names(filteredData) == 'ASSQ_2'] <- 'ASSQ_fatigue'
names(filteredData)[names(filteredData) == 'ASSQ_3'] <- 'ASSQ_headache'
names(filteredData)[names(filteredData) == 'ASSQ_4'] <- 'ASSQ_eyestrain'
names(filteredData)[names(filteredData) == 'ASSQ_5'] <- 'ASSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'ASSQ_6'] <- 'ASSQ_salivation'
names(filteredData)[names(filteredData) == 'ASSQ_7'] <- 'ASSQ_sweating'
names(filteredData)[names(filteredData) == 'ASSQ_8'] <- 'ASSQ_nausea'
names(filteredData)[names(filteredData) == 'ASSQ_9'] <- 'ASSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'ASSQ_10'] <- 'ASSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'ASSQ_11'] <- 'ASSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'ASSQ_12'] <- 'ASSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'ASSQ_13'] <- 'ASSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'ASSQ_14'] <- 'ASSQ_vertigo'
names(filteredData)[names(filteredData) == 'ASSQ_15'] <- 'ASSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'ASSQ_16'] <- 'ASSQ_burping'


#isolating the data dictionary
data_dict = read_excel("Project_1_Data.xlsx", sheet = "data_dictionary")

#filter into groups with experience or not
withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')

#taking averages for with/without experience
yes_avg_d_discomfort = mean(withVRexperience$d_discomfort)
yes_avg_d_fatigue = mean(withVRexperience$d_fatigue)
yes_avg_d_headache = mean(withVRexperience$d_headache)
yes_avg_d_eyestrain = mean(withVRexperience$d_eyestrain)
yes_avg_d_difficulty_focusing = mean(withVRexperience$d_difficulty_focusing)
yes_avg_d_salivation = mean(withVRexperience$d_salivation)
yes_avg_d_sweating = mean(withVRexperience$d_sweating)
yes_avg_d_nausea = mean(withVRexperience$d_nausea)
yes_avg_d_difficulty_concentrating = mean(withVRexperience$d_difficulty_concentrating)
yes_avg_d_fullness = mean(withVRexperience$d_fullness_of_head)
yes_avg_d_vision = mean(withVRexperience$d_blurred_vision)
yes_avg_d_dizziness_o = mean(withVRexperience$d_dizziness_o)
yes_avg_d_dizziness_c = mean(withVRexperience$d_dizziness_c)
yes_avg_d_vertigo = mean(withVRexperience$d_vertigo)
yes_avg_d_stomach = mean(withVRexperience$d_stomach_awareness)
yes_avg_d_burping = mean(withVRexperience$d_burping)


no_avg_d_discomfort = mean(noVRexperience$d_discomfort)
no_avg_d_fatigue = mean(noVRexperience$d_fatigue)
no_avg_d_headache = mean(noVRexperience$d_headache)
no_avg_d_eyestrain = mean(noVRexperience$d_eyestrain)
no_avg_d_difficulty_focusing = mean(noVRexperience$d_difficulty_focusing)
no_avg_d_salivation = mean(noVRexperience$d_salivation)
no_avg_d_sweating = mean(noVRexperience$d_sweating)
no_avg_d_nausea = mean(noVRexperience$d_nausea)
no_avg_d_difficulty_concentrating = mean(noVRexperience$d_difficulty_concentrating)
no_avg_d_fullness = mean(noVRexperience$d_fullness_of_head)
no_avg_d_vision = mean(noVRexperience$d_blurred_vision)
no_avg_d_dizziness_o = mean(noVRexperience$d_dizziness_o)
no_avg_d_dizziness_c = mean(noVRexperience$d_dizziness_c)
no_avg_d_vertigo = mean(noVRexperience$d_vertigo)
no_avg_d_stomach = mean(noVRexperience$d_stomach_awareness)
no_avg_d_burping = mean(noVRexperience$d_burping)


#taking averages for different age groups
grp1 = filter(filteredData, age_group == '16 to 21')
grp2 = filter(filteredData, age_group == '22 to 29')
grp3 = filter(filteredData, age_group == '30 to 37')
grp4 = filter(filteredData, age_group == '38 to 45')
grp5 = filter(filteredData, age_group == 'above 45')

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

data_by_age_group = data.frame(
  age_group = c("16 to 21", '22 to 29','30 to 37','38 to 45','Above 45'),
  mean_ssq = mean_sqq_ages
  
)

filteredData = mutate(filteredData, "generation" = case_when(year(Date) - age >= 2010 ~ "a", year(Date) - age >= 1997 ~ "Z", year(Date) - age >= 1981 ~ "Y", year(Date) - age >= 1965 ~ "X", TRUE ~ "Old"))

Summary of Findings

Our analysis found that individuals with prior VR experience reported heightened symptoms of motion sickness when exposed to VR, compared to those without experience. Age and gender had little to no effect on results. Further research should examine the nature, amount, and frequency of prior VR use to improve accuracy.

Initial Data Analysis (IDA)

Source

The data was sourced from Cosette Saunder’s paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”.

Structure

We specifically looked at participants that did not undergo any social modelling; 69 participants, each with 51 variables recorded. Our project focused the following variables:

  • Baseline SSQ (BSSQ), Active SSQ (ASSQ), and SSQ_Fullof 16 symptoms (quantitative, discrete): self-reported symptom severity before and after undergoing VR respectively, on a scale of 1 to 10.

  • Participants’ VRexperience (qualitative, nominal); sorted by experience to see how symptoms differed.

  • age of participants (quantitative, discrete); they were then sorted into age groups.

  • The change (\(\Delta\)) between BSSQ and ASSQ was calculated for each participant for each symptom, and we took the average \(\Delta\) for each symptom for each group (VRexperience and age_group) (quantitative, discrete).

Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)

age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
                 type = 'pie', direction = 'clockwise', sort = FALSE, rotation = 30, marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
                            showlegend = TRUE)
age_pie
Code
data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)

vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
                 type = 'pie', marker = list(colors = c("#FDFFB6", "#BDE0FE")))
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
                            showlegend = TRUE)

vr_pie
Code
data_experience = select(withVRexperience, age_group)
exp_counted = data_experience %>% count(age_group)

vr_pie = plot_ly(exp_counted, labels = ~age_group, values = ~n,
                 type = 'pie',marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
vr_pie <- vr_pie %>% layout(title = 'Distribution of age groups for those with VR experience',
                            showlegend = TRUE)

vr_pie
Code
data_experience = select(noVRexperience, age_group)
exp_counted = data_experience %>% count(age_group)

vr_pie = plot_ly(exp_counted, labels = ~age_group, values = ~n,
                 type = 'pie', marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
vr_pie <- vr_pie %>% layout(title = 'Distribution of age groups for those without VR experience',
                            showlegend = TRUE)

vr_pie

Limitations

  • Self-reporting bias for both BSSQ and ASSQ which makes the values prone to being over or underestimates by each participant.

  • VRexperience is a binary qualitative classification, it is not descriptive of the nature, amount or frequency, and that those with more than 10 VR experiences were excluded.

Research Question

Does past VR experience affect the symptoms experienced by people after a virtual reality experience?

Code
plt = ggplot(filteredData, aes(x = VRexperience, y = ssq_full, fill = VRexperience)) +
  geom_boxplot() +
  theme_minimal() +
  labs(x = "VR Experience", y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5")) +
  theme(legend.position = "none")
ggplotly(plt)

Participants with and without VR experience reported varied severity of symptoms. Interestingly, the VR-experienced group saw a greater median ssq_full and IQR, pointing to less consistent severity of symptoms. The group with VR experience also saw a higher mean value, contrasting previous research suggesting users with VR experience undergo less motion sickness (Chattha et al., 2020). This could be due to the smaller sample size of our data, wherein the symptoms vary too significantly for strong correlations to be found and random chance has noticable effect. Additionally, users who have experienced motion sickness in VR before may anticipate feeling sick again, leading to heightened symptoms, similar to a placebo.

Code
library(plotly)

yes_means = c(yes_avg_d_discomfort,
              yes_avg_d_fatigue,
              yes_avg_d_headache,
              yes_avg_d_eyestrain,
              yes_avg_d_sweating,
              yes_avg_d_nausea,
              yes_avg_d_dizziness_c,
              yes_avg_d_dizziness_o,
              yes_avg_d_difficulty_focusing,
              yes_avg_d_difficulty_concentrating,
              yes_avg_d_salivation,
              yes_avg_d_vision,
              yes_avg_d_vertigo,
              yes_avg_d_stomach,
              yes_avg_d_fullness,
              yes_avg_d_burping
              )

no_means = c(no_avg_d_discomfort,
             no_avg_d_fatigue,
             no_avg_d_headache,
             no_avg_d_eyestrain,
             no_avg_d_sweating,
             no_avg_d_nausea,
             no_avg_d_dizziness_c,
             no_avg_d_dizziness_o,
             no_avg_d_difficulty_focusing,
             no_avg_d_difficulty_concentrating,
             no_avg_d_salivation,
             no_avg_d_vision,
             no_avg_d_vertigo,
             no_avg_d_stomach,
             no_avg_d_fullness,
             no_avg_d_burping
             )

fig <- plot_ly(
  type  = 'scatterpolar',
  fill = 'toself',
  title = "Average change in BSSQ and ASSQ depending on VR experience for each symptom"
)

fig <- fig %>% 
  add_trace(
    r = yes_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea', 'Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision', 'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'With VR Experience'
  )

fig <- fig %>% 
  add_trace(
    r = no_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea','Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision',  'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'No VR Experience'
  )

fig <- fig %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(-0.5,2)
      )
    ),
    showlegend = T
  )

fig

The spider-chart above reinforces what we see in the box-plot, with VR experienced users indicating a higher average \(\Delta\) for almost all of the 16 symptoms.

To further investigate why our conclusion contradicts research, we stratified our analysis using a potential confounding variable: age.

Code
plt2 = ggplot(withVRexperience, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ for each age group (With experience)", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)
Code
plt2 = ggplot(noVRexperience, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ Scores for each age group (No experience)", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(age_group) , y = ssq_full, group = VRexperience, fill = VRexperience)) +
  geom_boxplot(position = "dodge") +
  labs(x = "Age Group",  y = "Full SSQ", title = "Full SSQ Scores by Age Group (both groups)") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE"))

ggplotly(plt) %>%  layout(boxmode = "group")
Code
plt2 = ggplot(filteredData, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ Scores for each age group", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)

There appears to be substantial overlap between the box plots of SSQ scores across different age groups, even when filtered by experience. In both sets of graphs, the medians remain relatively close, with no consistent trend among the groups.

Notably, while SSQ spread varies, there seems to be no consistent pattern across the groups themselves. Though as expected, the spread and medians are much lower for the ‘No Experience’ condition.

These observations conflict with prior research suggesting older users experience more severe cyber-sickness (Oh & Son, 2022), possibly due to our data having unequal sample sizes across age groups. Namely, there are 29 participants in the 30-37 age group but only 7 in the 16-21 age group. This limits the reliability of comparisons.

Finally, we decided to investigate the effect of gender on our data.

Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(gender) , y = ssq_full, group = VRexperience, fill = VRexperience)) +
  geom_boxplot() +
  labs(x = "Gender",  y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5"))

ggplotly(plt) %>% layout(boxmode = "group")
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(gender) , y = ssq_full, fill = gender)) +
  geom_boxplot() +
  labs(x = "Gender",  y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5")) +
  theme(legend.position = "none")

ggplotly(plt)

The box plots above indicate that females with VR experience report greater symptoms, agreeing with online research “indicat[ing] excruciating symptoms of motion sickness in females” (Chattha et al., 2020), but for participants without VR experience our data contradicts this conclusion—overall, we found little difference between genders.

Professional Standard of Report

We maintained a shared value of respect as we respected the privacy of others and the promises of confidentiality given to them as the data we used was anonymous. We obeyed ethical principles by pursuing objectivity as we presented all findings holistically, and in a transparent manner irrespective of outcomes.

Acknowledgements

Group Meetings

13 March, 13:00

In attendance: Bill, Borna, Gokul (Ryan), Mohamed

We confirmed our research question and the variables we will be using for analysis, discussed our ideas on what types of graphs we can use to present the data, and decided on roles for our group members to take up over the next few weeks.

Resources Used

Our group collaborated on the project using GitHub (https://github.com/uncoolbeans/DATA1901_project_1)

Additionally, we reviewed the official documentation for R, ggplot, plot_ly, and quarto

AI Usage

AI (ChatGPT) was used to clean up our code for one version pushed to GitHub. This was overwritten. In our final submission, there is no use of any AI.

References

Chattha, U. A., Janjua, U. I., Anwar, F., Madni, T. M., Cheema, M. F., & Janjua, S. I. (2020). Motion Sickness in Virtual Reality: An Empirical Evaluation. IEEE Access, 8, 130486–130499. https://doi.org/10.1109/access.2020.3007076

‌Oh, H., & Son, W. (2022). Cybersickness and Its Severity Arising from Virtual Reality Content: A Comprehensive Study. Sensors, 22(4), 1314. https://doi.org/10.3390/s22041314